if (!require("dplyr")) {install.packages("dplyr"); require("dplyr")}
if (!require("Seurat")) {install.packages("Seurat"); require("Seurat")}
if (!require("SeuratDisk")) {install.packages("SeuratDisk"); require("SeuratDisk")}
if (!require("knitr")) {install.packages("knitr"); require("knitr")}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!requireNamespace('BiocManager', quietly = TRUE)) {install.packages('BiocManager'); require("BiocManager")}
if (!require("here")) {install.packages("here"); require("here")}
if (!require("paletteer")) {install.packages("paletteer"); require("paletteer")} # color palette
if (!require("grDevices")) {install.packages("grDevices"); require("grDevices")} # for grDevices palette
if (!require("tidyverse")) {install.packages("tidyverse"); require("tidyverse")} # for data frame transformation
if (!require("tibble")) {install.packages("tibble"); require("tibble")} # for table transformation
if (!require("geneName")) {install.packages("geneName"); require("geneName")}
if (!require("ggrepel")) {install.packages("ggrepel"); require("ggrepel")}
if (!require("gghighlight")) {install.packages("gghighlight"); require("gghighlight")}
if (!require("ggpmisc")) {install.packages("ggpmisc"); require("ggpmisc")}
if (!require("ggupset")) {install.packages("ggupset"); require("ggupset")}
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")}
if (!require("viridis")) {install.packages("viridis"); require("viridis")}

library(SeuratData)
library(openxlsx)
library(gplots)
library(ggvenn)

1 Introduction

This file was made to explore the expression of KRT19 in TAL snRNASeq data from the KPMP 2021 snRNAseq dataset for Vidhi Dalal.

2 2021 KPMP Object

2.1 Load KPMP object (2021 data file)

This is the KPMP object that was originally formatted by Xiao-Tong Su (manually annotated meta-data) and then subsetted by Jessica Bahena Lopez for her TAL heterogenity manuscript.

KPMP.TAL <- readRDS(here("KPMP.TAL_2.rds")) 

head(KPMP.TAL@meta.data)
KPMP.TAL@meta.data$subclass.l2 <- factor(KPMP.TAL@meta.data$subclass.l2, levels = c("C-TAL", "M-TAL", "aTAL1", "aTAL2", "dC-TAL", "dM-TAL", "MD"))


DimPlot(KPMP.TAL, group.by = "subclass.l1")

DimPlot(KPMP.TAL, group.by = "subclass.l2", label = T)

KPMP.TAL
## An object of class Seurat 
## 29732 features across 35424 samples within 1 assay 
## Active assay: RNA (29732 features, 5526 variable features)
##  3 layers present: counts, data, scale.data
##  3 dimensional reductions calculated: pca, tsne, umap

2.2 VlnPlot -> Heatmap

gene_list <- list(c("SLC12A1", "CRYAB", "KRT19", "MMP7", "CLU", "SPP1"))

gene_unlist <- unlist(c("SLC12A1", "CRYAB", "KRT19", "MMP7", "CLU", "SPP1"))

VlnPlot(KPMP.TAL, features = gene_unlist, group.by = "subclass.l2", pt.size = 0.1)

# Create a dataframe with the average expression for the list of genes

df <- AverageExpression(object = KPMP.TAL, features = gene_unlist, group.by = 'subclass.l2')$RNA

df
##              C-TAL      M-TAL      aTAL1      aTAL2     dC-TAL    dM-TAL
## SLC12A1 64.6614275 78.7514259 15.6880536 20.4932148  14.816813 42.571376
## CRYAB    0.1801357  0.3792102  0.5580475  0.5916533   9.150091  1.314851
## KRT19    0.1646312  0.3307886  0.6143769  0.4947854   5.244709  2.094962
## MMP7     0.1497348  0.1068843  1.0871972  1.7900269  15.009163  1.037691
## CLU      0.2266637  0.3040202  0.7711453  1.0898689  13.479982  2.338266
## SPP1     2.9975088  1.4675743  8.1901007 23.4953085 104.092847 19.745206
##                 MD
## SLC12A1 44.4480911
## CRYAB    0.1844493
## KRT19    0.2642673
## MMP7     0.1830210
## CLU      0.2682441
## SPP1     4.1327308
# Calculate Z score for each row (could skip if you want raw expression)

df <- t(scale(t(df)))

# convert df to tidy format

df_tidy <- df %>%
  as.data.frame() %>%
  rownames_to_column(var = "Gene") %>%
  pivot_longer(cols = -Gene, names_to = "subclass.l2", values_to = "Expression")
               
# Graph with geom_tile

f1 <- ggplot(df_tidy, aes(x = subclass.l2, y = Gene, fill = Expression)) +
  geom_tile()

f1

# Make pretty with ggplot2

f2 <- ggplot(df_tidy, aes(x = subclass.l2, y = Gene, fill = Expression)) +
  geom_tile() +
  scale_fill_distiller(palette = "RdYlBu") +
  theme_minimal()

f2

# Reorder the genes by factoring

df_tidy$Gene <- factor(df_tidy$Gene, levels = c("CRYAB",  "MMP7", "CLU", "SPP1", "KRT19","SLC12A1"))

# Reorder sublcass.l2 by factoring

df_tidy$subclass.l2 <- factor(df_tidy$subclass.l2, levels = c("C-TAL", "M-TAL", "aTAL1", "aTAL2", "dC-TAL", "dM-TAL", "MD"))

2.3 DimPlots of TAL by groups

KPMP.TAL@meta.data$Enrollment.Category <- factor(KPMP.TAL@meta.data$Enrollment.Category, levels = c("Healthy Reference", "AKI", "CKD"))

DimPlot(KPMP.TAL, group.by = "subclass.l2", label = T, split.by = "Enrollment.Category")

KPMP.TAL@meta.data
KPMP.TAL@meta.data$Proteinuria..mg...Binned. <- factor(KPMP.TAL@meta.data$Proteinuria..mg...Binned., levels = c("<150 mg/g cr", "150 to <500 mg/g cr", "500 to <1000 mg/g cr", ">=1000 mg/g cr", ""))

DimPlot(KPMP.TAL, group.by = "subclass.l2", label = T, split.by = "Proteinuria..mg...Binned.")

2.4 FeaturePlots of TAL for KRT19

FeaturePlot(KPMP.TAL, "KRT19", order = T)

FeaturePlot(KPMP.TAL, "KRT19", split.by = "Enrollment.Category", order = T)

FeaturePlot(KPMP.TAL, "KRT19", split.by = "Proteinuria..mg...Binned.", order = T)

2.5 VlnPlots of TAL for KRT19

VlnPlot(KPMP.TAL, "KRT19", group.by = "Enrollment.Category")

VlnPlot(KPMP.TAL, "KRT19", group.by = "Proteinuria..mg...Binned.")

VlnPlot(KPMP.TAL, "KRT19", group.by = "subclass.l2")

2.6 DotPlots of TAL for KRT19

2.6.1 Scaled

DotPlot(
  KPMP.TAL,
  assay = NULL,
  features = "KRT19",
  cols = c("lightgrey", "blue"),
  col.min = -2.5,
  col.max = 2.5,
  dot.min = 0,
  dot.scale = 6,
  idents = NULL,
  group.by = "Enrollment.Category",
  split.by = NULL,
  cluster.idents = FALSE,
  scale = T,
  scale.by = "radius",
  scale.min = NA,
  scale.max = NA
)

DotPlot(
  KPMP.TAL,
  assay = NULL,
  features = "KRT19",
  cols = c("lightgrey", "blue"),
  col.min = -2.5,
  col.max = 2.5,
  dot.min = 0,
  dot.scale = 6,
  idents = NULL,
  group.by = "subclass.l2",
  split.by = NULL,
  cluster.idents = FALSE,
  scale = T,
  scale.by = "radius",
  scale.min = NA,
  scale.max = NA
)

2.6.2 Unscaled

DotPlot(
  KPMP.TAL,
  assay = NULL,
  features = "KRT19",
  cols = c("lightgrey", "blue"),
  col.min = -2.5,
  col.max = 2.5,
  dot.min = 0,
  dot.scale = 6,
  idents = NULL,
  group.by = "Enrollment.Category",
  split.by = NULL,
  cluster.idents = FALSE,
  scale = F,
  scale.by = "radius",
  scale.min = NA,
  scale.max = NA
)

DotPlot(
  KPMP.TAL,
  assay = NULL,
  features = "KRT19",
  cols = c("lightgrey", "blue"),
  col.min = -2.5,
  col.max = 2.5,
  dot.min = 0,
  dot.scale = 6,
  idents = NULL,
  group.by = "subclass.l2",
  split.by = NULL,
  cluster.idents = FALSE,
  scale = F,
  scale.by = "radius",
  scale.min = NA,
  scale.max = NA
)

3 Correlating KRT19 cells with VD New TAL

3.1 Add meta.data column called “KRT19status” depending on whether a cell expressed KRT19 or not

KPMP.TAL@meta.data$KRT19status <- ifelse(GetAssayData(KPMP.TAL, assay = "RNA", slot = "data")["KRT19", ] > 0, "yes", "no")

DimPlot(KPMP.TAL, group.by = "KRT19status", label = T)

VlnPlot(KPMP.TAL, "KRT19", assay = "RNA", group.by = "KRT19status")

Idents(KPMP.TAL) <- "KRT19status"

DimPlot(KPMP.TAL)

KRT19.DEGs <- FindMarkers(KPMP.TAL, ident.1 = "yes", ident.2 = "no")

KRT19.DEGs
VD.DEGs <- read.xlsx(here("DEG_NewTAL_vs_TAL.xlsx"))

# change x1 to "gene"

VD.DEGs <- VD.DEGs %>% rename(gene = X1)

x.markers <- VD.DEGs


x.markers_tb <- x.markers %>% data.frame() %>% filter(p_val_adj < 0.05) %>% filter(abs(avg_log2FC) > .5) %>%  as_tibble()

top6 <- list(head(x.markers_tb$gene))

x.markers_tb_H <- mousegnameConverter(x.markers_tb, "gene")
## ================================================================================
x.markers_tb_H = x.markers_tb_H[order(x.markers_tb_H[,"avg_log2FC"], decreasing = TRUE),]
x.markers_tb_H
# move the KRT19.DEG rownames to be a column called "gene"

y.markers <- KRT19.DEGs

y.markers_tb <- y.markers %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>%
  filter(p_val_adj < 0.05) %>%
  filter(abs(avg_log2FC) > .5) %>% 
  as_tibble()

    #X-Y DEGs Intersection Table
    
    xy.comp.TAL <- inner_join(x.markers_tb_H, y.markers_tb, by = "gene")
    
    xy.comp.TAL 
    #Set Range for Far Right Data Points
    df.upper <- subset(xy.comp.TAL, avg_log2FC.x > -.32 & avg_log2FC.y > -.32)
    #Set Range for Far Left Data Points
    df.lower <- subset(xy.comp.TAL, avg_log2FC.x < 0.32 & avg_log2FC.y < .32)
    
    xy.comp.TAL.plot <- ggplot(xy.comp.TAL, aes(x = avg_log2FC.x, y = avg_log2FC.y, label=gene)) +
      theme_classic() +
      geom_point(
        color=dplyr::case_when(
          (xy.comp.TAL$avg_log2FC.x > 1 & xy.comp.TAL$avg_log2FC.y > 1) ~ "#1b9e77", #sets color for df.upper points
          (xy.comp.TAL$avg_log2FC.x < -1 & xy.comp.TAL$avg_log2FC.y < -1) ~ "#d95f02", #sets color for df.lower points
          TRUE ~ "black")) +
      geom_text_repel(data=rbind(df.upper, df.lower),
                      segment.sixy.comp.TALe  = 0.2, #<--!! what is this?? !!--
                      segment.color = "grey50") +
      geom_smooth (method=lm) +
      labs(
        title = paste("Correlation of Log2FC Values of DEGs from",
                      "Mouse New TAL", "and",
                      "KPMP KRT19+ Cells", sep = " "), 
        x = paste("Average log2FC", "Mouse"), 
        y = paste("Average log2FC", "KPMP")
      ) +
      stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")),
                   label.x.npc = "left", label.y.npc = 0.90, #set the position of the eq
                   rr.digits = 3)
    
    print(xy.comp.TAL.plot)

    print(nrow(xy.comp.TAL))
## [1] 54

3.2 Corerelation Test

cor_result <- cor.test(xy.comp.TAL$avg_log2FC.x, xy.comp.TAL$avg_log2FC.y, method = "pearson")
cor_result
## 
##  Pearson's product-moment correlation
## 
## data:  xy.comp.TAL$avg_log2FC.x and xy.comp.TAL$avg_log2FC.y
## t = 6.546, df = 52, p-value = 2.619e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4931235 0.7965416
## sample estimates:
##       cor 
## 0.6721359
Sys.time()
## [1] "2024-12-08 12:07:08 PST"
sessionInfo()
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] fastmatch_1.1-4       ggvenn_0.1.10         gplots_3.2.0         
##  [4] openxlsx_4.2.5.2      SeuratData_0.2.2.9001 viridis_0.6.4        
##  [7] viridisLite_0.4.2     RColorBrewer_1.1-3    ggupset_0.3.0        
## [10] ggpmisc_0.5.4-1       ggpp_0.5.4            gghighlight_0.4.0    
## [13] ggrepel_0.9.5         geneName_0.2.3        lubridate_1.9.2      
## [16] forcats_1.0.0         stringr_1.5.1         purrr_1.0.2          
## [19] readr_2.1.5           tidyr_1.3.1           tibble_3.2.1         
## [22] tidyverse_2.0.0       paletteer_1.5.0       here_1.0.1           
## [25] ggplot2_3.5.1         knitr_1.45            SeuratDisk_0.0.0.9021
## [28] SeuratObject_5.0.1    Seurat_4.4.0          dplyr_1.1.4          
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22       splines_4.3.1          later_1.3.1           
##   [4] bitops_1.0-7           polyclip_1.10-6        confintr_1.0.2        
##   [7] lifecycle_1.0.4        rprojroot_2.0.4        globals_0.16.2        
##  [10] lattice_0.21-8         hdf5r_1.3.8            MASS_7.3-60           
##  [13] magrittr_2.0.3         limma_3.56.2           plotly_4.10.4         
##  [16] sass_0.4.9             rmarkdown_2.25         jquerylib_0.1.4       
##  [19] yaml_2.3.7             httpuv_1.6.11          sctransform_0.4.1     
##  [22] spam_2.10-0            zip_2.3.0              sp_2.1-3              
##  [25] spatstat.sparse_3.0-3  reticulate_1.34.0      cowplot_1.1.3         
##  [28] pbapply_1.7-2          abind_1.4-5            Rtsne_0.17            
##  [31] rappdirs_0.3.3         irlba_2.3.5.1          listenv_0.9.1         
##  [34] spatstat.utils_3.0-4   MatrixModels_0.5-3     goftest_1.2-3         
##  [37] spatstat.random_3.2-2  fitdistrplus_1.1-11    parallelly_1.36.0     
##  [40] leiden_0.4.3.1         codetools_0.2-19       tidyselect_1.2.0      
##  [43] farver_2.1.1           matrixStats_1.2.0      spatstat.explore_3.2-5
##  [46] jsonlite_1.8.8         ellipsis_0.3.2         progressr_0.14.0      
##  [49] ggridges_0.5.6         survival_3.5-5         tools_4.3.1           
##  [52] ica_1.0-3              Rcpp_1.0.12            glue_1.7.0            
##  [55] gridExtra_2.3          mgcv_1.8-42            xfun_0.40             
##  [58] withr_3.0.0            BiocManager_1.30.22    fastmap_1.1.1         
##  [61] fansi_1.0.4            SparseM_1.81           caTools_1.18.2        
##  [64] digest_0.6.33          timechange_0.2.0       R6_2.5.1              
##  [67] mime_0.12              colorspace_2.1-0       scattermore_1.2       
##  [70] gtools_3.9.5           tensor_1.5             spatstat.data_3.0-4   
##  [73] utf8_1.2.3             generics_0.1.3         data.table_1.14.10    
##  [76] httr_1.4.7             htmlwidgets_1.6.2      uwot_0.1.16           
##  [79] pkgconfig_2.0.3        gtable_0.3.4           lmtest_0.9-40         
##  [82] htmltools_0.5.8.1      dotCall64_1.1-1        scales_1.3.0          
##  [85] png_0.1-8              rstudioapi_0.15.0      tzdb_0.4.0            
##  [88] reshape2_1.4.4         nlme_3.1-162           cachem_1.0.8          
##  [91] zoo_1.8-12             KernSmooth_2.23-21     vipor_0.4.5           
##  [94] parallel_4.3.1         miniUI_0.1.1.1         ggrastr_1.0.2         
##  [97] pillar_1.9.0           vctrs_0.6.5            RANN_2.6.1            
## [100] promises_1.2.1         xtable_1.8-4           cluster_2.1.4         
## [103] beeswarm_0.4.0         evaluate_0.23          cli_3.6.1             
## [106] compiler_4.3.1         rlang_1.1.1            crayon_1.5.2          
## [109] future.apply_1.11.1    labeling_0.4.3         rematch2_2.1.2        
## [112] ggbeeswarm_0.7.2       plyr_1.8.9             stringi_1.7.12        
## [115] deldir_2.0-2           munsell_0.5.0          lazyeval_0.2.2        
## [118] spatstat.geom_3.2-8    quantreg_5.97          Matrix_1.6-5          
## [121] hms_1.1.3              patchwork_1.2.0        bit64_4.0.5           
## [124] future_1.33.1          shiny_1.8.0            highr_0.10            
## [127] ROCR_1.0-11            igraph_1.6.0           bslib_0.8.0           
## [130] bit_4.0.5              polynom_1.4-1